library(magrittr)
library(tidyr)
library(dplyr)
library(plotly)
library(ggrepel)
Initialisation and data import
source("scripts/init.R")
source("scripts/import_data.R")
source("scripts/data_phenot_get_parameters_std.R")
source("scripts/data_phenot_outliers_exclusion_std.R")
glimpse(data_phenot_parms_clean)
## Rows: 1,981
## Columns: 6
## $ robot_id <chr> "R20-20210930-001", "R20-20210930-001", "R20-20210930-001"~
## $ strain_name <chr> "LB-PD7-T1-1", "LB-PD7-T1-1", "LB-PD7-T1-1", "LB-PD7-T1-1"~
## $ bloc <fct> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3~
## $ bloc_month <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ parameter <chr> "cell_t0", "co2max", "death_prct", "lag", "pop_size", "tvm~
## $ value <dbl> 9.810000e+05, 2.223684e+01, 1.330000e+01, 9.868000e+00, 6.~
Phenotypic
data_phenot_parms_clean_wide <- data_phenot_parms_clean %>%
pivot_wider(names_from = "parameter",
values_from = "value") %>%
group_by(strain_name) %>%
mutate(replicate = paste0(strain_name, ".", row_number())) %>%
column_to_rownames(var = "replicate") %>%
ungroup() %>%
mutate(baker_id = strains$baker_id[match(strain_name, strains$strain_name)])
data_phenot_parms_clean_mat <- data_phenot_parms_clean_wide %>%
select(names(.)[names(.) %in% data_phenot_parms_clean$parameter]) %>%
as.matrix()
pca_pheno <- prcomp(data_phenot_parms_clean_mat, rank. = 3, scale. = T)
components <- pca_pheno[["x"]]
components <- data.frame(components)
components$PC2 <- -components$PC2
components$PC3 <- -components$PC3
tot_explained_variance_ratio <- summary(pca_pheno)[["importance"]]['Proportion of Variance',]
tot_explained_variance_ratio <- 100 * sum(tot_explained_variance_ratio)
fig <- plot_ly(components, x = ~PC1, y = ~PC2, z = ~PC3,
color = ~data_phenot_parms_clean_wide$baker_id,
colors = c('#636EFA','#EF553B','#00CC96','violet') ) %>%
add_markers(size = 12)
fig <- fig %>%
layout(
scene = list(bgcolor = "#e5ecf6")
)
fig
plot <- components %>%
mutate(baker_id = data_phenot_parms_clean_wide$baker_id,
strain_name = data_phenot_parms_clean_wide$strain_name) %>%
ggplot() +
aes(x = PC1, y = PC2, fill = baker_id, color = baker_id, group = strain_name) +
geom_point() +
geom_polygon(alpha = 0.2, color = alpha("white",0)) +
geom_text_repel(aes(label = strain_name , x = )) +
theme_minimal()
plot

plot + facet_wrap(~ baker_id)

Clustering
library(dendextend)
dm <- dist(x = scale(data_phenot_parms_clean_mat), method = "euclidean")
hc <- hclust(dm, method = "ward.D")
dg <- as.dendrogram(hc)
colorCodes <- c(LB="red", MB="green", AM="blue", PDC="yellow")
labels_colors(dg) <- colorCodes[data_phenot_parms_clean_wide$baker_id][order.dendrogram(dg)]
par(mar = c(1,1,1,10))
dg %>%
set("labels_cex",0.7) %>%
plot(., horiz=T, which.plots=2)
